	function bitspec = shareadders(bitspec, archspec, verbose)
	%bitspec = shareadders(bitspec, archspec, verbose)
	%
	%Shares adders in and between branches by finding common subexpressions.
	%The sharing is done after determining the partial products, but before
	%specifying the partial product reduction tree inputs.
	%
	%See [2] for a detailed description of how the sharing is done.
	%
	%Arguments:
	%  bitspec - uses fields:
	%    numout: number of output branches
	%    branches{k}:
	%      ppmap: partial product map
	%    verbose: verbosity level (0-2) [default: 0]
	%
	%Returns:
	%  bitspec - added fields:
	%    shared: shared adders map
	
	%Copyright 2008, 2010 Anton Blad
	%
	%This file is part of firgen.
	%
	%firgen is free software: you can redistribute it and/or modify
	%it under the terms of the GNU General Public License as published by
	%the Free Software Foundation, either version 3 of the License, or
	%(at your option) any later version.
	%
	%firgen is distributed in the hope that it will be useful,
	%but WITHOUT ANY WARRANTY; without even the implied warranty of
	%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	%GNU General Public License for more details.
	%
	%You should have received a copy of the GNU General Public License
	%along with firgen.  If not, see <http://www.gnu.org/licenses/>
	
	if nargin < 3
		verbose = 0;
	end
	
	% wpp is the width of the ppmap.bits matrix
	wpp = 0;
	
	% ppmax is the maximum of the values in the middle columns of the ppmap.bits matrix
	ppmax = zeros(1,wpp-2);
	
	% list of shared adders
	sha = [];
	
	% number of removed adders and registers
	facount = 0;
	regcount = 0;
	
	% Go through all ppmaps to determine wpp and ppmax
	for b = 1:bitspec.numout
		ppmap = bitspec.branches{b}.ppmap;
	
		numrows = size(ppmap.bits, 1);
		numcols = size(ppmap.bits, 2);
	
		for row = 1:numrows
			for col = 1:numcols
				pp = ppmap.bits{row,col};
	
				if size(pp,2) > 0
					% Determine pp field width
					if wpp == 0
						wpp = size(pp, 2);
						if wpp < 3
							error(sprintf('shareadders: partial product element width too small (%d), must be >= 3', wpp));
						end
					end
	
					% Verify that pp field width is uniform
					if size(pp,2) ~= wpp
						error('shareadders: partial product element width must be uniform');
					end
	
					% Sanity check for pp field elements
					if any(pp(:,1) ~= 0)
						error(sprintf('shareadders: invalid source in partial product (branch %d, pos (%d,%d)), only 0 allowed', b, row, col));
					end
					if any(pp(:,2:end-1) < 0)
						error(sprintf('shareadders: invalid partial product element (branch %d, pos (%d,%d)), must be >= 0', b, row, col));
					end
					if any(pp(:,end) ~= -1 & pp(:,end) ~= 1)
						error(sprintf('shareadders: invalid partial product scale (branch %d, pos (%d,%d)), only -1 and 1 allowed', b, row, col));
					end
	
					% Find the maximum element in all pp fields
					ppmax = max([ppmax;pp(:,2:end-1)], [], 1);
				end
			end
		end
	end
	
	% ppscale is used to determine a total ordering of partial products
	ppscale = [2*fliplr(cumprod(fliplr(ppmax+1))) 2 1];
	
	while 1
		% List containing subexpressions
		selist = zeros(0,3*wpp+1);
		% List containing subexpression frequencies
		secnt = [];
		% List containing the location of subexpressions
		seidx = {};
	
		% Go through all branches
		for b = 1:bitspec.numout
			ppmap = bitspec.branches{b}.ppmap;
	
			numrows = size(ppmap.bits, 1);
			numcols = size(ppmap.bits, 2);
	
			% Check all positions of the branch
			for row = 1:numrows
				delay = mod((row-1), archspec.maxheight)+1;
				for col = 1:numcols
					pp = ppmap.bits{row,col};
					numpp = size(pp, 1);
	
					% If there are at least three partial products, add all combinations
					% of normalized subexpressions to the subexpression frequency list
					if numpp >= 3
						for k1=1:(numpp-2)
							for k2=(k1+1):(numpp-1)
								for k3=(k2+1):numpp
									pp1 = pp(k1,:);
									pp2 = pp(k2,:);
									pp3 = pp(k3,:);
									ppm = [pp1;pp2;pp3];
	
									% Sort the partial products
									ppv = [sum(ppscale.*pp1), sum(ppscale.*pp2), sum(ppscale.*pp3)];
									[t,idx] = sort(abs(ppv));
									pps = ppm(idx,:);
									ppsign = pps(1,end);
	
									% Normalize with the largest partial product uninverted
									pps(:,end) = pps(:,end) * pps(1,end);
	
									% Create the partial product id vector
									ppid = [pps(1,:), pps(2,:), pps(3,:), delay];
	
									% Find if ppid already exists in the subexpression list
									pploc = find(all(repmat(ppid, size(selist,1), 1)==selist, 2));
	
									if isempty(pploc)
										% If not, add it to the list and store the occurence in seidx
										selist(end+1,:) = ppid;
										secnt(end+1) = 1;
										seidx{end+1} = [b row col k1 k2 k3 ppsign];
									else
										% If it does, check if any of the partial products have been 
										% marked for the subexpression for this position already. (Can 
										% happen if the same partial product occurs several times in 
										% the same position). If none of the partial products have been
										% marked, add the subexpression occurence to the relevant
										% element in seidx.
										t = seidx{pploc};
										kused = t(find(t(:,1) == b & t(:,2) == row & t(:,3) == col),4:6);
										if ~any(any(kused==k1 | kused==k2 | kused==k3))
											secnt(pploc) = secnt(pploc) + 1;
											t = [t; b row col k1 k2 k3 ppsign];
											seidx{pploc} = t;
										end
									end
								end
							end
						end
					end
				end
			end
		end
	
		if verbose >= 2
			disp(sprintf('Considering %d unique normalized subexpressions', size(selist, 1)));
		end
	
		% Find the maximum frequency subexpression
		[count,idx] = max(secnt);
	
		% If there are no more shared subexpressions, return
		if isempty(count) | count <= 1
			bitspec.shared = sha;
			if verbose >= 2
				disp('No more shared subexpressions found');
			end
			if verbose >= 1
				disp(sprintf('Found %d shared subexpressions. %d adders removed, %d regs removed', size(sha, 1), facount, regcount));
			end
			return
		end
	
		% Store the partial products of the chosen subexpression
		se = selist(idx,:);
		se(3*wpp+1) = (mod(se(3*wpp+1), archspec.maxheight) == 0);
		sha(end+1,:) = se;
	
		if verbose >= 2
			t1 = se(1:wpp);
			t2 = se(wpp+1:2*wpp);
			t3 = se(2*wpp+1:3*wpp);
			disp(sprintf('Sharing subexpression (%d%s),(%d%s),(%d%s): %d occurences', ...
					t1(1), sprintf(',%d', t1(2:end)), ...
					t2(1), sprintf(',%d', t2(2:end)), ...
					t3(1), sprintf(',%d', t3(2:end)), ...
					count));
		end
	
		% Update the number of removed adders and registers
		facount = facount + count - 1;
		if se(end) == 1
			regcount = regcount + count - 1;
		end
	
		% Retrieve the occurences of the subexpression
		sepp = seidx{idx};
	
		% Index of the adder in the shared adder list
		shaidx = size(sha, 1)-1;
	
		% Go through all branches and replace the subexpressions
		for b = 1:bitspec.numout
			ppmap = bitspec.branches{b}.ppmap;
			numrows = size(ppmap.bits, 1);
			numcols = size(ppmap.bits, 2);
	
			for row = 1:numrows
				for col = 1:numcols
					% Find the shared subexpressions for the current branch, row and column
					pprepidx = find(all(repmat([b row col], size(sepp, 1), 1) == sepp(:,1:3), 2));
					pprep = sepp(pprepidx, 4:7);
	
					% List of partial products to be removed
					ppremove = pprep(:, 1:3);
					ppremove = ppremove(:);
	
					% Add the shared adder outputs to the reduction tree on the level below
					for idx = 1:size(pprep, 1)
						if row+1 > size(ppmap.bits, 1)
							ppmap.bits{row+1,1} = [];
						end
						t = zeros(1,wpp);
						t(1:2) = [1 shaidx];
						t(end-1) = 0;
						t(end) = pprep(idx,4);
						ppmap.bits{row+1,col} = [ppmap.bits{row+1,col};t];
						if col > 1
							t = zeros(1,wpp);
							t(1:2) = [1 shaidx];
							t(end-1) = 1;
							t(end) = pprep(idx,4);
							ppmap.bits{row+1,col-1} = [ppmap.bits{row+1,col-1};t];
						end
					end
	
					% Remove the inputs to the shared adders from the reduction tree
					if ~isempty(ppremove)
						t = ppmap.bits{row,col};
						t(ppremove, :) = [];
						ppmap.bits{row,col} = t;
					end
				end
			end
			bitspec.branches{b}.ppmap = ppmap;
		end
	end
	
